*-------------------------------------------------------------------------------
*						Table 1 
*-------------------------------------------------------------------------------

** Set Path
global Raw_data    	"G:\project-finished\Descriptive\Data"
global App_data    	"G:\project-finished\Descriptive\Appendix Data"
global Class_data   "G:\project-finished\Descriptive\Classification"  
global Work_lab   	"G:\project-finished\Descriptive\Lab"
global Out_lab    	"G:\project-finished\Descriptive\Out" 

cd "$Work_lab"
                            
capture log close            
log using "$Out_lab\Tab 1", replace 
set more off     


*-------------------------------------------------------------------------------*
*					Step1: Generate Data
*					(1) Across Occupations
*-------------------------------------------------------------------------------
clear
clear all

//empty file for processed data
set obs 0
save tabledata1,replace emptyok

*Chinese census data: change occ category
foreach j in "2000" "2010" "2015"{
use whole_occ`j',clear
keep occ_number occ_name share
rename (occ_number occ_name) (consistent title_consistent)

merge 1:1 consistent title_consistent using "CHN-SOC-consistent.dta"

keep share 最终调整过后的

* Define share=0 if no pop in consistent-usa-chn classification.
replace share=0 if share==.
collapse (sum) share,by(最终调整过后的)

gen year=`j'

gen country="CHN"

append using tabledata1
save tabledata1,replace
}


*USA ACS data: change occ category

use "$App_data\ACS\usa_00003.dta",clear

gen acs=2000 if sample == 200004 //"2000 acs"
replace acs=2010 if sample == 201001
replace acs=2015 if sample == 201501

drop if acs==.

*删除失业
replace occsoc=subinstr(occsoc," ","",.)
drop if occsoc=="0"

*控制年龄
keep if age >=15&age<=64

gen num=_n
collapse (count) num,by(occsoc acs)
bys acs:egen total=total(num)
gen share =(num/total)*100
drop num total
save acs_structure.dta,replace

foreach j in "2000" "2010" "2015"{
use "acs_structure.dta",clear
keep if acs==`j'

merge 1:1 occsoc using "USA-SOC-consistent`j'.dta"

replace 最终调整过后的="其他" if 最终调整过后的==""

keep share 最终调整过后的

* Define share=0 if no pop in consistent-usa-chn classification.
replace share=0 if share==.
collapse (sum) share,by(最终调整过后的)

gen year=`j'
gen country="USA"

append using tabledata1
save tabledata1,replace
}


*-------------------------------------------------------------------------------*
*					Step1: Generate Data
*					(2) Within Occupations
*-------------------------------------------------------------------------------
clear
clear all

//empty file for processed data
set obs 0
save tabledata2,replace emptyok

*Chinese census data: change occ category and industry category
foreach j in "2000" "2010" "2015"{
use "`j'census_consistent.dta",clear

gen id_new=_n
collapse (count) id_new,by(consistent title_consistent new new_title)

merge m:1 new new_title using "cic_acsind_chn.dta"
keep if _m==3
drop _m

merge m:1 consistent title_consistent using "CHN-SOC-consistent.dta"
keep if _m==3
drop _m

collapse (sum) id_new,by(最终调整 最终调整过后的 部门分类)
rename (最终调整 最终调整过后的 部门分类) (adjcountry_ind adjcountry_occ adjcountry_sec)
compress

*Industry number share within o
bys adjcountry_occ: egen variety=count(adjcountry_ind)
unique adjcountry_ind
gen variety_share=variety/(`r(unique)')*100

*Industry employment share within o
bys adjcountry_occ:egen occ_pop=total(id_new)
gen ind_share=id_new/occ_pop*100

*Sector employment share within o
bys adjcountry_occ adjcountry_sec:egen sec_pop=total(id_new)
gen sec_share=sec_pop/occ_pop*100

*Main Industry employment share within o
bys adjcountry_occ:egen max_ind=max(ind_share)

*Main sector employment share within o
bys adjcountry_occ:egen max_sec=max(sec_share)

keep adjcountry_ind adjcountry_occ adjcountry_sec ind_share sec_share variety variety_share max* 
duplicates drop

gen year=`j'
gen country="CHN"

append using tabledata2
save tabledata2,replace
}


*USA ACS data: change occ category and industry category
foreach j in "2000" "2010" "2015" {

use "$App_data\ACS\usa_00003.dta",clear

*drop unemployed
replace occsoc=subinstr(occsoc," ","",.)
drop if occsoc=="0" |occsoc==""

*aged 15-64
keep if age >=15&age<=64

gen ind_code=string(ind)

	if `j'==2000{
	keep if sample == 200004 //"2000 acs"
	replace ind_code="0"+ind_code if strlen(ind_code)==2
	}
	
	if `j'==2010{
	keep if sample == 201001
	replace ind_code="0"+ind_code if strlen(ind_code)==3
	}

	if `j'==2015{
	keep if sample == 201501
	replace ind_code="0"+ind_code if strlen(ind_code)==3
	}

gen num=_n
collapse (count) num,by(ind_code occsoc)


merge m:1 occsoc using "USA-SOC-consistent`j'.dta" 

replace 最终调整过后的="其他" if 最终调整过后的==""
keep if _m==3
drop _m

rename ind_code IND`j'
merge m:1 IND`j' using "cic_acsind_usa`j'.dta"
keep if _m==3
drop _m

collapse (sum) num,by(最终调整过后的 最终调整 部门分类)
rename (最终调整 最终调整过后的 部门分类) (adjcountry_ind adjcountry_occ adjcountry_sec)
compress

*Industry number share within o
bys adjcountry_occ: egen variety=count(adjcountry_ind)
unique adjcountry_ind
gen variety_share=variety/(`r(unique)')*100

*Industry employment share within o
bys adjcountry_occ:egen occ_pop=total(num)
gen ind_share=num/occ_pop*100

*Sector employment share within o
bys adjcountry_occ adjcountry_sec:egen sec_pop=total(num)
gen sec_share=sec_pop/occ_pop*100

*Main Industry employment share within o
bys adjcountry_occ:egen max_ind=max(ind_share)

*Main sector employment share within o
bys adjcountry_occ:egen max_sec=max(sec_share)

keep adjcountry_ind adjcountry_occ adjcountry_sec ind_share sec_share variety variety_share max* 
duplicates drop

gen year=`j'
gen country="USA"

append using tabledata2
save tabledata2,replace
}


*-------------------------------------------------------------------------------
*					Step2: Calculation
*					Panel A: Across Occupations
*-------------------------------------------------------------------------------
clear
clear all

//empty file for result
set obs 0
save table,replace emptyok


*Define differnt occupation sample
use tabledata1,clear
gen tag="All"

append using tabledata1
replace tag = "no_ag" if tag!="All"
replace tag = "ag" if (最终调整过后的=="All Miscellaneous agricultural workers including animal breeders" |最终调整过后的=="木材采运人员"|最终调整过后的=="Forest and Conservation Workers"|最终调整过后的=="兽医、动物疫情和特种动物技术人员"|最终调整过后的=="Fishing And Hunting Workers" |最终调整过后的=="环境治理服务人员包括农村" ) & tag!="All"

*Calculate
reshape wide share,i(最终调整过后的 country tag) j(year)

foreach j in "2000" "2010" "2015"{
	replace share`j'=0 if share`j'==.
}

*d_{o,t}
gen d_share0015=abs(share2015-share2000)

bys country tag:egen D_t=total(d_share0015)
bys country tag:egen Ave_D_t=mean(d_share0015)

*Re_d{o,t}
gen Red_share0015=abs((share2015-share2000)/(share2015+share2000)*2)

bys country tag:egen Ave_ReD_t=mean(Red_share0015)

keep country D_t Ave_D_t Ave_ReD_t tag
duplicates drop

append using table
save table,replace


*-------------------------------------------------------------------------------
*					Step2: Calculation
*					Panel B: Within Occupations
*-------------------------------------------------------------------------------
use tabledata2,clear

gen tag="All"

append using tabledata2
replace tag = "no_ag" if tag!="All"
replace tag = "ag" if (adjcountry_occ=="All Miscellaneous agricultural workers including animal breeders" |adjcountry_occ=="木材采运人员"|adjcountry_occ=="Forest and Conservation Workers"|adjcountry_occ=="兽医、动物疫情和特种动物技术人员"|adjcountry_occ=="Fishing And Hunting Workers" |adjcountry_occ=="环境治理服务人员包括农村" ) & tag!="All"

preserve
*HHI index for ind
gen o_share_jt=(ind_share/100)^2

*HHI index for sec
bys adjcountry_occ adjcountry_sec year country tag:gen temp=_n
gen o_share_st=(sec_share/100)^2 if temp==1
replace o_share_st=0 if temp!=1

bys year country adjcountry_occ tag:egen HHI_ind=total(o_share_jt)
bys year country adjcountry_occ tag:egen HHI_sec=total(o_share_st)

collapse (mean) variety variety_share max_ind max_sec HHI*,by(year country tag)

reshape wide variety variety_share max_ind max_sec HHI*,i(country tag) j(year)

merge 1:1 country tag using table
drop _m
save table,replace
restore

*change in industry and sector
foreach p in "ind" "sec"{
preserve
keep adjcountry_occ adjcountry_`p' `p'_share year country tag
duplicates drop
drop if year==2010

reshape wide `p'_share,i(adjcountry_occ adjcountry_`p' country tag) j(year)

foreach i in "2000" "2015"{
replace `p'_share`i'=0 if `p'_share`i'==.
}

gen d_share0015=abs((`p'_share2015-`p'_share2000)/(`p'_share2015+`p'_share2000)*2)

bys adjcountry_occ country tag: egen temp=mean(d_share0015)
bys country tag:egen Ave_ReD`p'_t =mean(temp)

keep country Ave_ReD`p'_t tag
duplicates drop
merge 1:1 country tag using table 
drop _m
save table,replace
restore
}

*set the proper display format
use table,clear

drop *2000 *2010

format Ave_ReDsec_t Ave_ReDind_t variety_share2015 max_ind2015 max_sec2015 HHI_ind2015 HHI_sec2015 D_t Ave_D_t Ave_ReD_t %9.2f

replace variety2015 =round(variety2015,1)

rename * index*
rename (indexcountry indextag) (country tag)

reshape long index,i(country tag) j(stat,string) 
reshape wide index, i(stat tag) j(country,string)
rename index* *
gen ratio=CHN/USA
format ratio %9.2f

reshape wide CHN USA ratio,i(stat) j(tag,string)

sortobs stat, values("Ave_D_t" "D_t" "Ave_ReD_t" "variety2015" "variety_share2015" "max_ind2015" "max_sec2015" "HHI_ind2015" "HHI_sec2015" "Ave_ReDind_t" "Ave_ReDsec_t") 

list _all

erase tabledata1.dta
erase tabledata2.dta

log close



